This is a Photizo example tutorial, using raw spatially resolved microspectroscopy data collected from 10 µm brain sections, where 3 are from cases with neurological disease and 3 are from controls. This data is available on Zenodo - to find more information about this and to view a list of required packages and versions, please see our github.
import photizo.preprocessing as pzp
import photizo.quantification as pzq
import photizo.visualisation as pzv
import numpy as np
import scanpy as sc
import pandas as pd
import anndata
import seaborn as sns
import matplotlib.pyplot as plt
sc.set_figure_params(scanpy=True, dpi=150, dpi_save=300, frameon=True, vector_friendly=True, fontsize=14, figsize=[8, 6], color_map=None, format='pdf', facecolor=None, transparent=False, ipython_format='png2x')
samples = ['/well/dendrou/projects/uFTIR/Offline_analysis/data/raw_data/OPUS_exported_data/Diamond_unbinned_Cut_BLcorrected_3it/C025_2x8V_pre-processed_nobinning.txt',
'/well/dendrou/projects/uFTIR/Offline_analysis/data/raw_data/OPUS_exported_data/Diamond_unbinned_Cut_BLcorrected_3it/C036_2x8H_pre-processed_nobinning.txt',
'/well/dendrou/projects/uFTIR/Offline_analysis/data/raw_data/OPUS_exported_data/Diamond_unbinned_Cut_BLcorrected_3it/C054_2x8V_pre-processed_nobinning.txt',
'/well/dendrou/projects/uFTIR/Offline_analysis/data/raw_data/OPUS_exported_data/Diamond_unbinned_Cut_BLcorrected_3it/MS352_2x8V_B2_L1_pre-processed_4x4.txt',
'/well/dendrou/projects/uFTIR/Offline_analysis/data/raw_data/OPUS_exported_data/Diamond_unbinned_Cut_BLcorrected_3it/MS352_2x8V_B2_L2_pre-processed_nobinning.txt',
'/well/dendrou/projects/uFTIR/Offline_analysis/data/raw_data/OPUS_exported_data/Diamond_unbinned_Cut_BLcorrected_3it/MS425_1x8H_B2_pre-processed_nobinning.txt']
name = ['C025',
'C036',
'C054',
'MS352_L1',
'MS352_L2',
'MS425']
group = ['Control',
'Control',
'Control',
'Case',
'Case',
'Case']
sex = ['M',
'M',
'M',
'M',
'M',
'F']
pixels = [[64, 256],
[256, 64],
[64, 256],
[64, 256],
[64, 256],
[256, 32]]
for sn in range (0,len(samples)):
XY = np.loadtxt('%s' % samples[sn]) #Load sample
X = XY[:,0]
Y = XY[:,1:] #Separate wavenumbers (X) from spectra (Y)
X,Y = pzp.orderVectors(X, Y)
X,Y = pzp.removeCO2(X, Y)
Y0, Yn0, idx = pzp.holeExclusion(np.shape(Y)[0], np.shape(Y)[1], X, Y)
np.savetxt('files/%s_HolesPositions.txt' % (name[sn]), idx, delimiter='\t')
Yn0VN = pzp.vectorNormalisation(Yn0)
dY = pzq.secondDerivative(X, Yn0VN.T,3)
np.savetxt('files/%s_noHoles.txt' % (name[sn]), Yn0VN, delimiter='\t')
np.savetxt('files/%s_noHoles_2nddY.txt' % (name[sn]), dY, delimiter='\t')
pzv.integrationVisualisation(Yn0VN, X, '%s Lipids' % name[sn], 2800, 3000) # Check integration region
Y0VNH = pzp.reinsertHoles(Yn0VN, idx) # Reinsert excluded spectra
pzv.spatialPlotROI(X, Y0VNH, pixels[sn][0], pixels[sn][1], 2800, 3000, name[sn]+'Lipids')
header = np.zeros(shape=(1, np.shape(dY)[1])).astype(np.str)
group_l = header.astype(np.str)
sex_l = header.astype(np.str)
header[:,:] = name[sn]
group_l[:,:] = group[sn]
sex_l[:,:] = sex[sn]
clindata = np.concatenate((header,group_l, sex_l), axis = 0)
if (sn == 0):
all_clindata = clindata
all_dy = dY
all_yn0vn = Yn0VN
else:
all_clindata = np.concatenate((all_clindata,clindata), axis = 1)
all_dy = np.concatenate((all_dy, dY), axis = 1)
all_yn0vn = np.concatenate((all_yn0vn, Yn0VN), axis = 1)
print ('sample %s done' % name[sn])
np.savetxt('files/Photizo_All_samples_noHoles.txt', all_yn0vn, delimiter='\t')
np.savetxt('files/Photizo_All_samples_noHoles_2nddY.txt', all_dy, delimiter='\t')
df = pd.DataFrame(data = all_clindata.T, columns = ['Sample_id', 'Group', 'Sex'])
df.to_csv('files/Photizo_ClinData_noHoles.txt')
sample C025 done
sample C036 done
sample C054 done
sample MS352_L1 done
sample MS352_L2 done
sample MS425 done
import photizo.pca as pzpca
EVR, PCs, fit = pzpca.PCA(X, all_dy.T)
pzpca.PCAprojection(1, 2, fit, EVR)
df = pd.DataFrame(data = all_clindata.T, columns = ['Sample_id', 'Group', 'Sex'])
df.to_csv('files/Photizo_ClinData_noHoles.txt')
Y = np.loadtxt('files/Photizo_All_samples_noHoles_2nddY.txt')
header = pd.read_csv('files/Photizo_ClinData_noHoles.txt')
np.shape(Y)
np.shape(header)
(78913, 4)
adata = anndata.AnnData(X= Y.T, obs= header)
/well/dendrou/users/dkv948/devel/skylake/python3-venv-skylake-3.7.4/lib/python3.7/site-packages/anndata/_core/anndata.py:119: ImplicitModificationWarning: Transforming to str index.
warnings.warn("Transforming to str index.", ImplicitModificationWarning)
Perform clustering
import photizo.clustering as pzcl
adata = pzcl.cluster(adata, n_neigh = 15, spread = 1.0, min_dist = 0.1, leiden_res = 0.1)
sc.pl.umap(adata, color=['leiden'], palette = 'Set2')
/well/dendrou/users/dkv948/devel/skylake/python3-venv-skylake-3.7.4/lib/python3.7/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if is_string_dtype(df[key]) and not is_categorical(df[key]) ... storing 'Sample_id' as categorical ... storing 'Group' as categorical ... storing 'Sex' as categorical
Explore other leiden resolutions and clinical data - ideal for inspection of batch effects
sc.tl.leiden(adata, key_added = 'leiden017', resolution = 0.17)
sc.pl.umap(adata, color=['leiden', 'leiden017', 'Sample_id'])
Save resolution of choice
sc.pl.umap(adata, color=['leiden017'], palette = 'Set2', save = 'UMAP_photizo.png')
WARNING: saving figure to file figures/umapUMAP_photizo.png
Visualise cluster distribution across samples
adata = anndata.AnnData(X= all_yn0vn.T, obs= adata.obs)
import matplotlib.pyplot as plt
stacked = pd.crosstab(adata.obs['Sample_id'], adata.obs['leiden017'], normalize='index')
stacked.plot.bar(stacked=True, cmap = 'Set2').legend(loc='lower right')
plt.savefig('Stacked_photizo.png', dpi = 300)
Average the spectra of each cluster
MeanC0 = np.mean(adata[adata.obs['leiden017']=='0'].X, axis = 0).reshape(-1,1)
MeanC1 = np.mean(adata[adata.obs['leiden017']=='1'].X, axis = 0).reshape(-1,1)
MeanC2 = np.mean(adata[adata.obs['leiden017']=='2'].X, axis = 0).reshape(-1,1)
MeanC3 = np.mean(adata[adata.obs['leiden017']=='3'].X, axis = 0).reshape(-1,1)
MeanC4 = np.mean(adata[adata.obs['leiden017']=='4'].X, axis = 0).reshape(-1,1)
MeanC5 = np.mean(adata[adata.obs['leiden017']=='5'].X, axis = 0).reshape(-1,1)
Mean_All = np.concatenate((MeanC0, MeanC1, MeanC2, MeanC3, MeanC4, MeanC5), axis = 1)
/well/dendrou/users/dkv948/devel/skylake/python3-venv-skylake-3.7.4/lib/python3.7/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if not is_categorical(df_full[k]):
Plot spectra in region of interest
import photizo.visualisation as pzvl
LipX, LipY = pzp.selectRegionXY(2800, 3045, X, Mean_All)
plt = pzvl.plotSpectra(LipY, LipX, 'Lipid Region')
Plot entire spectra per cluster
AllX, AllY = pzp.selectRegionXY(951, 3049, X, Mean_All)
pzvl.plotSpectra(AllY, AllX, 'Entire Spectra')
LipX, LipY = pzp.selectRegionXY(2800, 3000, X, Mean_All)
pzvl.plotSpectra(LipY, LipX, 'Lipid region')
Quantification analysis
import matplotlib.pyplot as plt
thisdict = [
"lightseagreen",
"darkorange",
"deeppink",
"yellowgreen",
"tan",
"grey"]
df = adata.obs[['Sample_id', 'leiden017', 'Group']]
df.columns = ['Patient', 'Clusters', 'Group']
matrix = adata.X.T
df['Lipids'] = pzq.adjustedIntegrate(2800,3000, X, matrix)
g = sns.violinplot(x="Clusters", y="Lipids", data=df, palette = thisdict)
g.set_title("Lipids (2800 - 3000 $\mathregular{cm^{-1}}$)")
g.set(ylabel=None)
g.set(xlabel=None)
plt.savefig('Lipid_content_example.png', dpi = 300)
plt.show()
/well/dendrou/users/dkv948/devel/skylake/python3-venv-skylake-3.7.4/lib/python3.7/site-packages/ipykernel_launcher.py:14: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df['ahelix'] = pzq.alphaHelix(X, matrix)
g = sns.violinplot(x="Group", y="ahelix", data=df, palette = sns.color_palette("seismic_r", 2))
g.set_title(r'$\alpha$' "-helix")
g.set(ylabel='%')
g.set(xlabel=None)
plt.show()
/well/dendrou/users/dkv948/devel/skylake/python3-venv-skylake-3.7.4/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy """Entry point for launching an IPython kernel.
Mapping cluster classification
image = adata.obs[adata.obs['Sample_id']=='MS352_L1']['leiden017'].to_numpy().astype(int)
holes = np.loadtxt('/well/dendrou/projects/uFTIR/Offline_analysis/data/Preprocessed_data/MS352_B2_L1_HolesPositions_4x4_2020-12-07 16-15-07.txt')
image = image.reshape((-1, 1))
holes = list(holes.astype(int))
unique, counts = np.unique(image, return_counts=True)
n=0
for n in range (0,len(holes)):
image = np.insert(image, holes[n], -99, axis = 0)
pzvl.clusterMap(pixels[3][0], pixels[3][1], image, 'MS352_L1')